# encoding: utf-8
import re

import scrapy
from scrapy.selector import Selector
from job import items
class MySpider(scrapy.Spider):
    name = "job"
    allowed_domains = ["funds.hexun.com"]
    # start_urls = ["http://jingzhi.funds.hexun.com/DataBase/jzzs.aspx?fundcode=270021&startdate=2016-01-01&enddate=2017-03-09"]
    start_urls = []

    def __init__(self):
        self.start_urls = self.set_url()

    def set_url(self):
        url_list = []
        start_date="2015-01-01"
        end_date = "2017-03-31"
        fname_code = "D:/gp/gitWorkspace/jobAnalysis/job/job/spiders/fund_code.txt"
        fname_time = "D:/gp/gitWorkspace/jobAnalysis/job/job/spiders/time.txt"
        time_file = open(fname_time,"r")
        line = time_file.readline()
        #print(line)
        lines = line.split(",")
        start_date = lines[0]
        print start_date
        end_date = lines[1]
        print end_date
        for code in open(fname_code,"r"):
            code = code.strip('\n')
            url = "http://jingzhi.funds.hexun.com/DataBase/jzzs.aspx?fundcode=" + code + "&startdate=" + start_date + "&enddate=" + end_date
            url_list.append(url)
        print url_list
        return url_list

    def parse(self,response):
        sel = Selector(response)

        regex = ur"fundcode=\d+"
        match = re.search(regex, response.url)
        if match:
            result = match.group(0)
            fund_codes = result.split("=")
            fname_fund_code = fund_codes[1]
            print (fname_fund_code)
        else:
            result = ""

        body = sel.xpath('//tbody/tr')
        fundItems = []
        for i in body:
            tds = i.xpath('.//td')
            list = []
            print(len(tds))
            fundItem = items.FundItem()
            for i in range(len(tds)):
                if i == 0:
                    issue_date = tds[i].xpath('./text()').extract()[0]
                    #print "issue_date :"
                    #print  issue_date
                elif i == 1:
                    per_net = tds[i].xpath('./text()').extract()[0]
                    #print 'per_net'
                    # print per_net
                elif i == 2:
                    total_net = tds[i].xpath('./text()').extract()[0]
                    # print 'total_net'
                    # print total_net
                else:
                    growth_rate = tds[i].xpath('./text()').extract()[0]
                    # print 'growth_rate'
                    # print growth_rate
            fundItem['issue_date'] = issue_date
            fundItem['per_net'] = per_net
            fundItem['total_net'] = total_net
            fundItem['growth_rate'] = growth_rate
            fundItem['fname_fund_code'] = fname_fund_code
            fundItems.append(fundItem)
            #print fundItems
        return fundItems

            # for td in tds:
            #     tdprint = td.xpath('./text()').extract();
            #     list.append(tdprint);
            # print(list)

